import io
import jieba
txt = io.open("所有诗歌内容.txt", "r", encoding='utf-8').read()
words = jieba.lcut(txt)
counts = {}
wr = open('词频.txt','w',encoding='utf-8')#将词频前一百保存到txt文件中
for word in words:
    if '\u4e00' <= word <= '\u9fff':  # 判断是否为中文

            counts[word] = counts.get(word, 0) + 1

print("所有诗词前100高频词：")
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse=True)
for i in range(100):
    word, count = items[i]
    # wr.write(u"{0:<10}{1:>5}".format(word, count)+"\n")
    if ((i + 1) % 5 != 0):
        print(u"{0:<10}{1:>5}".format(word, count), '|\t', end=' ')
    else:
        print(u"{0:<10}{1:>5}".format(word, count))
    # print(u"{0:<10}{1:>5}".format(word, count) + "\n")
    # print('==============')